from pyspark import SparkConf, SparkContext
import os
os.environ['PYSPARK_PYTHON'] = "E:\\Python310\\python.exe"

conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)

# 准备一个RDD
rdd = sc.parallelize([1, 2, 3, 4, 5])


# 通过map方法将全部数据都乘以10

rdd2 = rdd.map(lambda data: data * 10).map(lambda data: data + 5)
print(rdd2.collect())
# (T) -> U
# (T) -> T

# 链式调用
# rdd2.map(lambda data: data + 5)
